{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cognee GraphRAG with Multimedia files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "source": [
    "## Load Data\n",
    "\n",
    "We will use a few sample multimedia files which we have on GitHub for easy access."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T11:54:44.613431Z",
     "start_time": "2025-06-30T11:54:44.606687Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pathlib\n",
    "\n",
    "# cognee knowledge graph will be created based on the text\n",
    "# and description of these files\n",
    "mp3_file_path = os.path.join(\n",
    "    os.path.abspath(\"\"),\n",
    "    \"../\",\n",
    "    \"examples/data/multimedia/text_to_speech.mp3\",\n",
    ")\n",
    "png_file_path = os.path.join(\n",
    "    os.path.abspath(\"\"),\n",
    "    \"../\",\n",
    "    \"examples/data/multimedia/example.png\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set environment variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T11:54:46.739157Z",
     "start_time": "2025-06-30T11:54:46.734808Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "if \"LLM_API_KEY\" not in os.environ:\n",
    "    os.environ[\"LLM_API_KEY\"] = \"\"\n",
    "\n",
    "# \"neo4j\" or \"networkx\"\n",
    "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"kuzu\"\n",
    "# Not needed if using networkx\n",
    "# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
    "# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
    "# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
    "\n",
    "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
    "os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
    "# Not needed if using \"lancedb\" or \"pgvector\"\n",
    "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
    "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
    "\n",
    "# Relational Database provider \"sqlite\" or \"postgres\"\n",
    "os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
    "\n",
    "# Database name\n",
    "os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
    "\n",
    "# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
    "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
    "# os.environ[\"DB_PORT\"]=\"5432\"\n",
    "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
    "# os.environ[\"DB_PASSWORD\"]=\"cognee\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "\u001b[2m2025-10-22T17:58:21.914432\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mDeleted old log file: /Users/daulet/Desktop/dev/cognee-claude/logs/2025-10-22_18-20-40.log\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:22.759223\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mLogging initialized           \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m \u001b[36mcognee_version\u001b[0m=\u001b[35m0.3.6-local\u001b[0m \u001b[36mdatabase_path\u001b[0m=\u001b[35m/Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m \u001b[36mgraph_database_name\u001b[0m=\u001b[35m\u001b[0m \u001b[36mos_info\u001b[0m=\u001b[35m'Darwin 24.5.0 (Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132)'\u001b[0m \u001b[36mpython_version\u001b[0m=\u001b[35m3.10.11\u001b[0m \u001b[36mrelational_config\u001b[0m=\u001b[35mcognee_db\u001b[0m \u001b[36mstructlog_version\u001b[0m=\u001b[35m25.4.0\u001b[0m \u001b[36mvector_config\u001b[0m=\u001b[35mlancedb\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:22.759643\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mDatabase storage: /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.3.6-local\n"
     ]
    }
   ],
   "source": [
    "import cognee\n",
    "print(cognee.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run Cognee with multimedia files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "\u001b[2m2025-10-22T17:58:24.045051\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mLoaded JSON extension         \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:24.081025\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:26.937024\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mDatabase deleted successfully.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[1mStorage manager absolute path: /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_cache\u001b[0m\n",
      "\n",
      "\u001b[1mDeleting cache...             \u001b[0m\n",
      "\n",
      "\u001b[1m✓ Cache deleted successfully! \u001b[0m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "User 5c6da0e1-4bda-4b32-a6e3-ca70b884fb9a has registered.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "\u001b[2m2025-10-22T17:58:28.397580\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run started: `981301fd-9699-5cd2-9746-577c0076b844`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.398001\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.398362\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.399412\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run started: `981301fd-9699-5cd2-9746-577c0076b844`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.399724\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.400149\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.414674\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mRegistered loader: pypdf_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.415122\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mRegistered loader: text_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.415472\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mRegistered loader: image_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.415781\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mRegistered loader: audio_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.416132\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mRegistered loader: unstructured_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.416494\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mRegistered loader: advanced_pdf_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:28.416861\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mRegistered loader: beautiful_soup_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.666583\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.667605\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.668153\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run completed: `981301fd-9699-5cd2-9746-577c0076b844`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.673512\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.673986\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.674429\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run completed: `981301fd-9699-5cd2-9746-577c0076b844`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.686749\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mOntology file 'None' not found. No owl ontology will be attached to the graph.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.707284\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run started: `eea87f6e-3943-552c-b2fe-904ac1e367f0`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.707716\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.708080\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.708748\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run started: `eea87f6e-3943-552c-b2fe-904ac1e367f0`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.709019\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.709373\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.716846\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.720657\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.725864\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:32.731948\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.077494\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mReconnecting to Kuzu database...\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.126562\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mLoaded JSON extension         \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.161293\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.161962\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.162356\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'profession' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.162703\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'programmer' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.163116\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:36.163438\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:37.300377\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:38.621515\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.290034\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'profession' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.291121\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'programmers' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.292185\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.293038\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.293777\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.294485\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.295087\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'joke' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:39.295651\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mNo close match found for 'humor' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.433350\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.434081\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.434611\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.435199\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.435629\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.435958\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.436247\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run completed: `eea87f6e-3943-552c-b2fe-904ac1e367f0`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:40.697594\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:42.368373\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.185789\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.186535\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.186875\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.187279\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.187623\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.187953\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.188254\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mPipeline run completed: `eea87f6e-3943-552c-b2fe-904ac1e367f0`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{UUID('849137b0-173d-5a0f-9462-403398a3b1e2'): PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('8f4e8447-24c9-5d2a-afb2-f86256ca4f34'), dataset_id=UUID('849137b0-173d-5a0f-9462-403398a3b1e2'), dataset_name='main_dataset', payload=None, data_ingestion_info=[{'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('8f4e8447-24c9-5d2a-afb2-f86256ca4f34'), dataset_id=UUID('849137b0-173d-5a0f-9462-403398a3b1e2'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('cc1ec4a6-2621-5143-ad19-ae7703db040b')}, {'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('8f4e8447-24c9-5d2a-afb2-f86256ca4f34'), dataset_id=UUID('849137b0-173d-5a0f-9462-403398a3b1e2'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('f3d53fbe-2a29-57e4-9e55-d87a49890ecc')}])}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import cognee\n",
    "\n",
    "# Create a clean slate for cognee -- reset data and system state\n",
    "await cognee.prune.prune_data()\n",
    "await cognee.prune.prune_system(metadata=True)\n",
    "\n",
    "# Add multimedia files and make them available for cognify\n",
    "await cognee.add([mp3_file_path, png_file_path])\n",
    "\n",
    "# Create knowledge graph with cognee\n",
    "await cognee.cognify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query Cognee for summaries related to multimedia files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T11:44:56.372628Z",
     "start_time": "2025-06-30T11:44:55.978258Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "\u001b[2m2025-10-22T17:58:43.213961\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mStarting summary retrieval for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.495466\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mFound 2 summaries from vector search\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.496119\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mReturning 2 summary payloads  \u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.496456\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mStarting completion generation for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
      "\n",
      "\u001b[2m2025-10-22T17:58:43.496815\u001b[0m [\u001b[32m\u001b[1minfo     \u001b[0m] \u001b[1mReturning context with 2 item(s)\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'id': 'b4da8f65-1ab7-5816-b6ca-c3b7e16d7ea9', 'created_at': 1761155918667, 'updated_at': 1761155918667, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': 'Changing a light bulb is a hardware issue for programmers.'}\n",
      "{'id': '875f97da-6b05-52af-973d-54939a229a21', 'created_at': 1761155922404, 'updated_at': 1761155922404, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': 'How many coders are needed to replace a light bulb? Zero. That’s an issue for hardware.'}\n"
     ]
    }
   ],
   "source": [
    "from cognee.api.v1.search import SearchType\n",
    "\n",
    "# Query cognee for summaries of the data in the multimedia files\n",
    "search_results = await cognee.search(\n",
    "    query_type=SearchType.SUMMARIES,\n",
    "    query_text=\"What is in the multimedia files?\",\n",
    ")\n",
    "\n",
    "# Display search results\n",
    "for result_text in search_results:\n",
    "    print(result_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "# Only exit in interactive mode, not during GitHub Actions\n",
    "import os\n",
    "\n",
    "# Skip exit if we're running in GitHub Actions\n",
    "if not os.environ.get('GITHUB_ACTIONS'):\n",
    "    print(\"Exiting kernel to clean up resources...\")\n",
    "    os._exit(0)\n",
    "else:\n",
    "    print(\"Skipping kernel exit - running in GitHub Actions\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
